# time plot
df_drug_monthly |>
autoplot(Sales) +
labs(title = "Monthly sales of drugs") +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2) +
theme(legend.position = "none")
# Seasonal subseries plots
df_drug_monthly |>
gg_subseries(Sales) +
labs(title = "Monthly sales of drugs") +
theme(legend.position = "none")
df_drug_monthly |>
ggplot(aes(x = as.factor(month(Month)), y = Sales)) +
geom_boxplot() +
facet_wrap(~ Drug, scales = "free_y", ncol = 2)
df_drug_monthly |>
gg_season(Sales)
# time plot
df_drug_weekly |>
autoplot(Sales) +
labs(title = "weekly sales of drugs") +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2) +
theme(legend.position = "none")
# Seasonal subseries plots
df_drug_weekly |>
gg_subseries(Sales) +
labs(title = "weekly sales of drugs") +
theme(legend.position = "none")
df_drug_weekly |>
ggplot(aes(x = as.factor(week(Week)), y = Sales)) +
geom_boxplot() +
facet_wrap(~ Drug, scales = "free_y", ncol = 1)
df_drug_weekly |>
gg_season(Sales)
# time plot
df_drug_daily |>
autoplot(Sales) +
labs(title = "daily sales of drugs") +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2) +
theme(legend.position = "none")
# Seasonal subseries plots
df_drug_daily |>
gg_subseries(Sales, period = "week") +
labs(title = "daily sales of drugs") +
theme(legend.position = "none")
df_drug_daily |>
ggplot(aes(x = weekdays(Date), y = Sales)) +
geom_boxplot() +
facet_wrap(~ Drug, scales = "free_y", ncol = 2)
df_drug_daily |>
gg_season(Sales, period = "week")
df_drug_daily |>
gg_season(Sales, period = "month")
df_drug_daily |>
gg_season(Sales, period = "year")
# time plot
df_drug_hourly |>
autoplot(Sales) +
labs(title = "hourly sales of drugs") +
facet_wrap(vars(Drug), scales = "free_y", ncol = 1) +
theme(legend.position = "none")
# Seasonal subseries plots
df_drug_hourly |>
gg_subseries(Sales, period = "day") +
labs(title = "daily sales of drugs") +
theme(legend.position = "none")
df_drug_hourly |>
ggplot(aes(x = as.factor(hour(Datetime)), y = Sales)) +
geom_boxplot() +
facet_wrap(~ Drug, scales = "free_y", ncol = 1)
df_drug_hourly |>
gg_season(Sales, period = "day")
From the above, it can be estimated that this data has a one-year seasonality, and therefore, weekly or monthly data are candidates for data that can be utilized in the forecast. Since there are some missing data of unknown reason in the monthly data, it is appropriate to use the weekly data in this analysis.
In the following, weekly data is used to perform Moving average smoothing and STL Decomposition.
## Moving average smoothing
# add the 52-MA and 52x2-MA to the data set
df_drug_weekly <- df_drug_weekly |>
group_by(Drug) |>
mutate(
MA_52 = slider::slide_dbl(Sales, mean,
.before = 25, .after = 26, .complete = TRUE),
MA_52x2 = slider::slide_dbl(MA_52, mean,
.before = 1, .after = 0, .complete = TRUE)
) |>
ungroup()
# plot 52x2-MA
df_drug_weekly |>
autoplot(Sales) +
geom_line(aes(y = MA_52x2, color = "52-week Moving Average"), size =1) +
labs(title = "Weekly Sales of Drugs with 52-week Moving Average") +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2) +
theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 52 rows containing missing values or values outside the scale range
## (`geom_line()`).
Add some implications from the moving
average smoothing.
## Decomposition
# function for Classical additive decomposition
CL_dcmp <- function(drug_code) {
df_drug_weekly |>
filter(Drug == drug_code) |>
model(
classical_decomposition(Sales, type = "additive")
) |>
components() |>
autoplot() +
ggtitle(paste("Classical additive decomposition for Drug:", drug_code))
}
# apply Classical additive to all unique drug code
lapply(unique(df_drug_weekly$Drug), CL_dcmp)
## [[1]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[2]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[3]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[4]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[5]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[6]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[7]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
##
## [[8]]
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_line()`).
# function for STL decomposition
STL_dcmp <- function(drug_code) {
df_drug_weekly |>
filter(Drug == drug_code) |>
model(
STL(Sales ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)
) |>
components() |>
autoplot() +
ggtitle(paste("STL Decomposition for Drug:", drug_code))
}
# apply STL decomposition to all unique drug code
lapply(unique(df_drug_weekly$Drug), STL_dcmp)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
Add some implications from the
decomposition.
In the following, weekly data is used.
## ACF and PACF
# ACF plot
df_drug_weekly |>
ACF(Sales) |>
autoplot() +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2)
# PACF plot
df_drug_weekly |>
PACF(Sales) |>
autoplot() +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2)
Add some implications .
# function for ADF test
ADF_test <- function(drug_code) {
print(drug_code)
df_drug_weekly |>
filter(Drug == drug_code) |>
pull(Sales) |>
tseries::adf.test() |>
print()
}
# apply ADF test to all unique drug code
for (drug_code in unique(df_drug_weekly$Drug)) {
ADF_test(drug_code)
}
## [1] "M01AB"
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -3.6996, Lag order = 6, p-value = 0.02442
## alternative hypothesis: stationary
##
## [1] "M01AE"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -5.4432, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
## [1] "N02BA"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -4.6998, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
## [1] "N02BE"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -3.9667, Lag order = 6, p-value = 0.01106
## alternative hypothesis: stationary
##
## [1] "N05B"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -3.7337, Lag order = 6, p-value = 0.02271
## alternative hypothesis: stationary
##
## [1] "N05C"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -6.1986, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
##
## [1] "R03"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -3.4184, Lag order = 6, p-value = 0.05161
## alternative hypothesis: stationary
##
## [1] "R06"
##
## Augmented Dickey-Fuller Test
##
## data: pull(filter(df_drug_weekly, Drug == drug_code), Sales)
## Dickey-Fuller = -3.772, Lag order = 6, p-value = 0.0208
## alternative hypothesis: stationary
Add some implications .
## Forecasting
# use 52 weeks as a test set
train <- df_drug_weekly |>
filter_index("2014 W01" ~ "2018 W41")
test <- df_drug_weekly |>
filter_index("2018 W42" ~ "2019 W41")
# Fit the models
base_fit <- train |>
model(
Mean = MEAN(Sales),
`Naïve` = NAIVE(Sales),
`Seasonal naïve` = SNAIVE(Sales)
)
# Generate forecasts for 52 weeks
base_fc <- base_fit |> forecast(h = 52)
# Plot forecasts against actual values
base_fc |>
autoplot(test, level = NULL) +
autolayer(
filter_index(df_drug_weekly, "2018 W42" ~ .),
colour = "black"
) +
labs(
y = "Sales",
title = "Base Forecasts for weekly Drug Sales (for test data)"
) +
guides(colour = guide_legend(title = "Forecast")) +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2)
## Plot variable not specified, automatically selected `.vars = Sales`
# calculate accuracy
base_ac <- accuracy(base_fc, test)
# select RMSE and MAPE and convert it to wide data
base_rmse <- base_ac |>
select(.model, Drug, RMSE) |>
pivot_wider(names_from = Drug, values_from = RMSE)
base_rmse
## # A tibble: 3 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean 8.52 9.77 8.97 82.2 12.7 2.88 36.1 13.4
## 2 Naïve 9.27 9.97 5.65 88.0 30.8 3.35 30.7 12.9
## 3 Seasonal naïve 10.3 10.3 7.22 60.0 21.2 4.66 33.8 11.7
base_mape <- base_ac |>
select(.model, Drug, MAPE) |>
pivot_wider(names_from = Drug, values_from = MAPE)
base_mape
## # A tibble: 3 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean 22.1 33.8 52.8 36.5 21.3 Inf 50.2 84.3
## 2 Naïve 23.0 36.8 28.4 44.6 56.2 Inf 72.6 93.5
## 3 Seasonal naïve 27.5 34.8 37.2 24.4 31.8 Inf 61.9 72.0
## ARIMA Models
# Fit the models
arima_fit <- train |>
model(ARIMA(Sales))
arima_fit
## # A mable: 8 x 2
## # Key: Drug [8]
## Drug `ARIMA(Sales)`
## <chr> <model>
## 1 M01AB <ARIMA(1,1,1)>
## 2 M01AE <ARIMA(3,0,1) w/ mean>
## 3 N02BA <ARIMA(0,1,2)(1,0,0)[52]>
## 4 N02BE <ARIMA(0,1,1)(1,1,0)[52]>
## 5 N05B <ARIMA(0,1,3)(1,0,0)[52]>
## 6 N05C <ARIMA(1,0,0) w/ mean>
## 7 R03 <ARIMA(0,1,1)(0,0,1)[52]>
## 8 R06 <ARIMA(0,0,3)(0,1,0)[52] w/ drift>
# Generate forecasts for 52 weeks
arima_fc <- arima_fit |> forecast(h = 52)
# Plot forecasts against actual values
arima_fc |>
autoplot(test, level = NULL) +
autolayer(
filter_index(df_drug_weekly, "2018 W42" ~ .),
colour = "black"
) +
labs(
y = "Sales",
title = "ARIMA Forecasts for weekly Drug Sales (for test data)"
) +
guides(colour = guide_legend(title = "Forecast")) +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2)
## Plot variable not specified, automatically selected `.vars = Sales`
# calculate accuracy
arima_ac <- accuracy(arima_fc, test)
# select RMSE and MAPE and convert it to wide data
arima_rmse <- arima_ac |>
select(.model, Drug, RMSE) |>
pivot_wider(names_from = Drug, values_from = RMSE)
arima_rmse
## # A tibble: 1 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Sales) 8.82 9.74 5.62 70.5 15.1 2.86 30.3 11.7
arima_mape <- arima_ac |>
select(.model, Drug, MAPE) |>
pivot_wider(names_from = Drug, values_from = MAPE)
arima_mape
## # A tibble: 1 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Sales) 22.4 33.4 28.8 29.4 25.1 Inf 67.6 77.5
## ETS Models
# Fit the models
ets_fit <- train |>
model(ETS(Sales))
## Warning: Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
ets_fit
## # A mable: 8 x 2
## # Key: Drug [8]
## Drug `ETS(Sales)`
## <chr> <model>
## 1 M01AB <ETS(M,N,N)>
## 2 M01AE <ETS(A,N,N)>
## 3 N02BA <ETS(M,N,N)>
## 4 N02BE <ETS(M,N,N)>
## 5 N05B <ETS(M,N,N)>
## 6 N05C <ETS(A,Ad,N)>
## 7 R03 <ETS(A,N,N)>
## 8 R06 <ETS(M,A,N)>
# Generate forecasts for 52 weeks
ets_fc <- ets_fit |> forecast(h = 52)
## Warning: There were 8 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `ETS(Sales) = (function (object, ...) ...`.
## Caused by warning:
## ! Seasonal periods (`period`) of length greather than 24 are not supported by ETS. Seasonality will be ignored.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 7 remaining warnings.
# Plot forecasts against actual values
ets_fc |>
autoplot(test, level = NULL) +
autolayer(
filter_index(df_drug_weekly, "2018 W42" ~ .),
colour = "black"
) +
labs(
y = "Sales",
title = "ETS Forecasts for weekly Drug Sales (for test data)"
) +
guides(colour = guide_legend(title = "Forecast")) +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2)
## Plot variable not specified, automatically selected `.vars = Sales`
# calculate accuracy
ets_ac <- accuracy(ets_fc, test)
# select RMSE and MAPE and convert it to wide data
ets_rmse <- ets_ac |>
select(.model, Drug, RMSE) |>
pivot_wider(names_from = Drug, values_from = RMSE)
ets_rmse
## # A tibble: 1 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Sales) 8.80 9.78 5.79 92.3 14.6 3.00 30.7 15.6
ets_mape <- ets_ac |>
select(.model, Drug, MAPE) |>
pivot_wider(names_from = Drug, values_from = MAPE)
ets_mape
## # A tibble: 1 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Sales) 22.4 34.0 27.7 48.2 24.4 Inf 69.4 161.
## Prophet Models
# Fit the models
prophet_fit <- train |>
model(prophet(Sales))
prophet_fit
## # A mable: 8 x 2
## # Key: Drug [8]
## Drug `prophet(Sales)`
## <chr> <model>
## 1 M01AB <prophet>
## 2 M01AE <prophet>
## 3 N02BA <prophet>
## 4 N02BE <prophet>
## 5 N05B <prophet>
## 6 N05C <prophet>
## 7 R03 <prophet>
## 8 R06 <prophet>
# Generate forecasts for 52 weeks
prophet_fc <- prophet_fit |> forecast(h = 52)
# Plot forecasts against actual values
prophet_fc |>
autoplot(test, level = NULL) +
autolayer(
filter_index(df_drug_weekly, "2018 W42" ~ .),
colour = "black"
) +
labs(
y = "Sales",
title = "Prophet Forecasts for weekly Drug Sales (for test data)"
) +
guides(colour = guide_legend(title = "Forecast")) +
facet_wrap(vars(Drug), scales = "free_y", ncol = 2)
## Plot variable not specified, automatically selected `.vars = Sales`
# calculate accuracy
prophet_ac <- accuracy(prophet_fc, test)
# select RMSE and MAPE and convert it to wide data
prophet_rmse <- prophet_ac |>
select(.model, Drug, RMSE) |>
pivot_wider(names_from = Drug, values_from = RMSE)
prophet_rmse
## # A tibble: 1 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 prophet(Sales) 9.91 8.88 6.80 57.0 15.4 3.33 29.7 8.98
prophet_mape <- prophet_ac |>
select(.model, Drug, MAPE) |>
pivot_wider(names_from = Drug, values_from = MAPE)
prophet_mape
## # A tibble: 1 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 prophet(Sales) 24.9 29.9 31.4 17.8 22.3 Inf 56.5 62.8
## comparison
rmse_all <- rbind(base_rmse, arima_rmse, ets_rmse, prophet_rmse)
rmse_all
## # A tibble: 6 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean 8.52 9.77 8.97 82.2 12.7 2.88 36.1 13.4
## 2 Naïve 9.27 9.97 5.65 88.0 30.8 3.35 30.7 12.9
## 3 Seasonal naïve 10.3 10.3 7.22 60.0 21.2 4.66 33.8 11.7
## 4 ARIMA(Sales) 8.82 9.74 5.62 70.5 15.1 2.86 30.3 11.7
## 5 ETS(Sales) 8.80 9.78 5.79 92.3 14.6 3.00 30.7 15.6
## 6 prophet(Sales) 9.91 8.88 6.80 57.0 15.4 3.33 29.7 8.98
mape_all <- rbind(base_mape, arima_mape, ets_mape, prophet_mape)
mape_all
## # A tibble: 6 × 9
## .model M01AB M01AE N02BA N02BE N05B N05C R03 R06
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean 22.1 33.8 52.8 36.5 21.3 Inf 50.2 84.3
## 2 Naïve 23.0 36.8 28.4 44.6 56.2 Inf 72.6 93.5
## 3 Seasonal naïve 27.5 34.8 37.2 24.4 31.8 Inf 61.9 72.0
## 4 ARIMA(Sales) 22.4 33.4 28.8 29.4 25.1 Inf 67.6 77.5
## 5 ETS(Sales) 22.4 34.0 27.7 48.2 24.4 Inf 69.4 161.
## 6 prophet(Sales) 24.9 29.9 31.4 17.8 22.3 Inf 56.5 62.8